Introduction

GTestimate can not only normalize scRNA-seq data but is also well suited for Spatial Transcriptomics data-sets from technologies such as 10X Visium.

We will process the stxBrain anterior1 example data-set using NormalizeData(), and GTestimate()

nCount_plot <- SpatialFeaturePlot(brain, features = "nCount_Spatial") + theme(legend.position = "top")

brain_ML <- NormalizeData(brain, assay = "Spatial") %>%
  FindVariableFeatures() %>%
  ScaleData() %>%
  RunPCA() %>%
  FindNeighbors(dims = 1:30) %>%
  FindClusters() %>%
  RunUMAP(dims = 1:30)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 2696
## Number of edges: 82197
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8831
## Number of communities: 19
## Elapsed time: 0 seconds
brain_GT <- GTestimate::GTestimate(brain, assay = "Spatial")  %>%
  FindVariableFeatures() %>%
  ScaleData() %>%
  RunPCA() %>%
  FindNeighbors(dims = 1:30) %>%
  FindClusters() %>%
  RunUMAP(dims = 1:30)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 2696
## Number of edges: 81483
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8820
## Number of communities: 17
## Elapsed time: 0 seconds

Clustering

This analysis results in two different clusterings, one with 17 and one with 19 clusters.

To visualize the differences between clusterings we can compare the clusters using the Jaccard Index and the Hungarian Algorithm:

jaccard_tbl <- tibble(Cell = colnames(brain_ML), ML = brain_ML$seurat_clusters) %>%
  left_join(tibble(Cell = colnames(brain_GT), GT = brain_GT$seurat_clusters)) %>%
  group_by(ML, GT) %>%
  summarise(cell_count = n()) %>%
  group_by(ML) %>%
  mutate(total_ML = sum(cell_count)) %>%
  group_by(GT) %>%
  mutate(total_GT = sum(cell_count)) %>%
  mutate(total = total_GT + total_ML - cell_count) %>%
  group_by(ML, GT) %>%
  mutate(`Jaccard Index` = cell_count/total) %>%
  select(ML, GT, `Jaccard Index`)


jaccard_tbl <- expand_grid(ML = unique(jaccard_tbl$ML), GT = unique(jaccard_tbl$GT)) %>%
  left_join(jaccard_tbl) %>% 
  mutate(`Jaccard Index` = replace(`Jaccard Index`, is.na(`Jaccard Index`), 0))

jaccard_tmp <- jaccard_tbl %>%
  pivot_wider(names_from = c(GT), values_from = `Jaccard Index`)

jaccard_mtx <- as.matrix(jaccard_tmp[,-1])
rownames(jaccard_mtx) <- as.numeric(as.character(jaccard_tmp$ML))

hungarian_solve <- HungarianSolver(1-jaccard_mtx)

jaccard_tbl <- jaccard_tbl %>%
  mutate(GT = factor(GT, levels = colnames(jaccard_mtx[hungarian_solve$pairs[,1], hungarian_solve$pairs[,2]])))

jaccard_plot <- ggplot(jaccard_tbl) +
  geom_tile(aes(x = ML, y = GT, fill = `Jaccard Index`)) +
  scale_fill_viridis_c() +
  xlab('NormalizeData') +
  ylab('GTestimate')

jaccard_plot

ggsave(filename = str_glue('{folder}jaccard_plot.pdf'), jaccard_plot, width = 4.5, height = 3)
saveRDS(file = str_glue('{folder}jaccard_plot.Rds'), jaccard_plot)

Now we can visualize the clustering dependent on the normalization method in 2D using Seurat’s SpatialDimPlot():

slide_image <- SpatialDimPlot(brain_GT, alpha = 0) +
  theme(legend.position = 'none',
        title = element_text(size = font_title)) +
  ggtitle('Slide')
clustering_ML <- SpatialDimPlot(brain_ML, label = T, label.size = 3, label.color = 'black', stroke = 0, pt.size.factor = 1.7) +
  theme(legend.position = 'none',
        title = element_text(size = font_title)) +
  ggtitle('NormalizeData')
clustering_GT <- SpatialDimPlot(brain_GT, label = T, label.size = 3, label.color = 'black', stroke = 0, pt.size.factor = 1.7) +
  theme(legend.position = 'none',
        title = element_text(size = font_title)) +
  ggtitle('GTestimate')

#st_clustering_plots <- slide_image + clustering_ML + clustering_GT
st_clustering_plots <- clustering_ML + clustering_GT

st_clustering_plots

ggsave(str_glue('{folder}st_clustering.png'), st_clustering_plots, width = 6.6, height = 4.4)
saveRDS(file = str_glue('{folder}st_clustering_plots.Rds'), st_clustering_plots)

Normalized Expression

We can also plot the log-normalized expression of cell type specific marker genes such as Ttr dependent on the normalization method used:

SpatialColors <- colorRampPalette(colors = rev(x = brewer.pal(n = 11, name = "Spectral")))

gene <- 'Ttr'
brain_ML_limited <- SetAssayData(brain_ML, layer = 'RNA', slot = 'data', new.data = replace(GetAssayData(brain_ML, layer = 'RNA', slot = 'data'), list = GetAssayData(brain_ML) > 3, 3))
brain_GT_limited <- SetAssayData(brain_GT, layer = 'RNA', slot = 'data', new.data = replace(GetAssayData(brain_GT, layer = 'RNA', slot = 'data'), list = GetAssayData(brain_GT) > 3, 3))
color_range <- c(0, max(c(GetAssayData(brain_ML_limited, slot = 'data')[gene,], GetAssayData(brain_GT_limited, slot = 'data')[gene,])))

TTr_spatial_plot_ML <- SpatialFeaturePlot(brain_ML_limited, features = gene, slot = 'data', stroke = 0, pt.size.factor = 1.7) +
  ggtitle('NormalizeData') +
  scale_fill_gradientn(limits = color_range, colours=SpatialColors(n=100), breaks = c(0,1,2,3), labels = c(0,1,2,'\u2265 3')) +
  theme(legend.margin = margin(0,0,0,0),
        legend.box.spacing = unit(0, 'cm'),
        legend.position = 'bottom',
        legend.text = element_text(size = font_axis),
        legend.key.size = legend_key_size*0.75,
        title = element_text(size = font_title))

TTr_spatial_plot_GT <- SpatialFeaturePlot(brain_GT_limited, features = gene, slot = 'data', stroke = 0, pt.size.factor = 1.7) +
  ggtitle('GTestimate') +
  scale_fill_gradientn(limits = color_range, colours=SpatialColors(n=100), breaks = c(0,1,2,3), labels = c(0,1,2,'\u2265 3')) +
  theme(legend.position = 'none',
        title = element_text(size = font_title),
        legend.text = element_text(size = font_axis))
#TTr_combined <- slide_image + TTr_spatial_plot_ML + TTr_spatial_plot_GT
TTr_combined <- TTr_spatial_plot_ML + TTr_spatial_plot_GT

TTr_combined

ggsave(str_glue('{folder}TTr_combined.png'), TTr_combined, width = 6.6, height = 2.8)
saveRDS(file = str_glue('{folder}TTr_combined.Rds'), TTr_combined)

To make the difference in relative expression estimation between NormalizeData() and GTestimate() more obvious we can plot the relative difference in relative expression of the example gene between NormalizeData() and GTestimate()

brain_ML$ML_vs_GT <- (GetAssayData(brain_ML, slot = 'data')['Ttr',] - GetAssayData(brain_GT, slot = 'data')['Ttr',])/GetAssayData(brain_ML, slot = 'data')['Ttr',] * 100
brain_ML$ML_vs_GT <- replace(brain_ML$ML_vs_GT, is.na(brain_ML$ML_vs_GT), 0)

relative_breaks <- c(0,20,40)

brain_ML_GT_lim <- brain_ML
brain_ML_GT_lim$ML_vs_GT <- replace(brain_ML_GT_lim$ML_vs_GT, brain_ML_GT_lim$ML_vs_GT > 40 , 40)

TTr_ML_vs_GT <- SpatialFeaturePlot(brain_ML_GT_lim, features = "ML_vs_GT", stroke = 0, pt.size.factor = 1.7) +
  theme(legend.position = 'bottom',
        title = element_text(size = font_title),
        legend.text = element_text(size = font_axis)) +
  scale_fill_viridis_c(name = expression(paste(Delta,phantom(x), Ttr, sep = '')),
                       breaks = relative_breaks,
                       labels = c('0','20','>40%'))

TTr_ML_vs_GT

ggsave(str_glue('{folder}TTr_ML_vs_GT.png'), TTr_ML_vs_GT, width = 2.2, height = 2.8)
saveRDS(file = str_glue('{folder}TTr_ML_vs_GT.Rds'), TTr_ML_vs_GT)

brain_ML[['UMIs/spot']] <- brain_ML$nCount_Spatial %>% replace(brain_ML$nCount_Spatial > 40000, 40000)

nReads_plot <- SpatialFeaturePlot(brain_ML, features = 'UMIs/spot', stroke = 0, pt.size.factor = 1.7) +
  theme(legend.position = 'bottom',
        title = element_text(size = font_title),
        legend.text = element_text(size = font_axis),
        legend.title = element_text(face = 'italic')) +
  scale_fill_viridis_c(breaks = c(5000,20000,40000),
                       labels = c('5000', '20000', '\u2265 40000'))
nReads_plot

ggsave(str_glue('{folder}nReads_plot.png'), nReads_plot, width = 2.2, height = 2.8)
saveRDS(file = str_glue('{folder}nReads_plot.Rds'), nReads_plot)

The amount of missing-mass in Spatial Transcriptomics data-set is generally smaller due to the higher nCount per spot compared to scRNA-seq data

ggplot(tibble(`Missing Mass` = brain_GT$missing_mass)) + geom_histogram(aes(x = `Missing Mass`), bins = 30, fill = "#3182BD", color = 'white') + theme_classic()

The clustering of spots can also be visualized in UMAP space without considering the Spatial Coordinates, and shows a more continous transition between clusters after GTestimate():

brain_umap_ML <- Embeddings(brain_ML, reduction = 'umap') %>%
  as.data.frame %>%
  rownames_to_column(var = 'Cell') %>%
  as_tibble %>%
  cbind(`Seurat clusters` = brain_ML$seurat_clusters) %>%
  mutate(Method = 'NormalizeData')

brain_umap_GT <- Embeddings(brain_GT, reduction = 'umap') %>%
  as.data.frame %>%
  rownames_to_column(var = 'Cell') %>%
  as_tibble %>%
  cbind(`Seurat clusters` = brain_GT$seurat_clusters) %>%
  mutate(Method = 'GTestimate')

brain_umap_combined <- rbind(brain_umap_ML, brain_umap_GT) %>% mutate(Method = factor(Method, levels = c('NormalizeData', 'GTestimate')))

brain_umap <- ggplot(brain_umap_combined) + geom_point(aes(x = umap_1, y = umap_2, col = `Seurat clusters`), size = 0.15) + facet_wrap(~Method, scales = 'free') + theme(legend.position = 'none')

brain_umap

ggsave(filename = str_glue('{folder}brain_umap.png'), brain_umap, width = 5, height = 2.5)
saveRDS(file = str_glue('{folder}brain_umap.Rds'), brain_umap)
spatial_hist_data <- tibble(`NormalizeData` = GetAssayData(brain_ML, assay = 'Spatial', slot = 'data')['Ttr',],
                           `GTestimate` = GetAssayData(brain_GT, assay = 'GTestimate', slot = 'data')['Ttr',]) %>%
  pivot_longer(cols = c(NormalizeData, GTestimate),values_to = 'expression', names_to = 'Method') %>%
  mutate(Method = fct_inorder(Method))

spatial_hist_plot <- ggplot(spatial_hist_data) + geom_density(aes(x = expression, fill = Method, col = Method), position = 'identity', alpha = 0.4) + scale_x_continuous(trans = 'log2') + theme(legend.position = 'bottom')

spatial_hist_plot

ggsave(filename = str_glue('{folder}spatial_hist_plot.pdf'), spatial_hist_plot, width = 5, height = 2.5)
saveRDS(file = str_glue('{folder}spatial_hist_plot.Rds'), spatial_hist_plot)